In the pass1_annotation script there were some that were hard to identify and could potentially be artifact. The goal of this script is to subset the ‘tangled’ clusters and further inspect.
knitr::opts_knit$set(root.dir = ".")
library(dplyr) # ungroup()
library(ggtree) # BuildClusterTree()
library(gridExtra) # grid.arrange()
library(gtools) # smartbind()
library(parallel) # detectCores()
library(plotly) # plot_ly()
library(Seurat) # Idents()
library(ShinyCell) # createConfig()
library(tidyr) # %>%
out <- "../../results/pass_1/recluster_group_1/"
sample_order <- c("E3.M","E3.F","E4.M","E4.F")
sample_colors <- c("#26946A","#1814A1","#EAC941","#DF5F00")
sample_order2 <- c("Male E3", "Male E4", "Female E3", "Female E4")
isoform_order <- c("E4","E3")
isoform_colors <- c("darkgray","cornflowerblue")
sex_order <- c("Male","Female")
sex_colors <- c("darkgray","purple")
mouse.unannotated <- readRDS("../../rObjects/pass1_unannotated.rds")
Idents(mouse.unannotated) <- "seurat_clusters"
DefaultAssay(mouse.unannotated) <- "RNA"
mouse.unannotated <- NormalizeData(mouse.unannotated)
## Normalizing layer: counts
cluster_colors <- c("black","gray40","gray","red1","blue","magenta1","darkorange2",
"darkorange4","yellow1","yellow4","yellow2","green","lightgreen",
"chartreuse1","Aquamarine","cyan","SteelBlue","red4","forestgreen",
"purple1","purple4","orange","plum1","salmon","tan","chocolate4")
DimPlot(object = mouse.unannotated,
reduction = "umap",
shuffle = TRUE,
repel = TRUE,
cols = cluster_colors,
label = TRUE)
Group 1 consists of clusters 0, 2, 3, 4, 8, 9, 11, 13, 16, 19, 21, 25. ## Subset and recluster
# subset to group 1
mouse.unannotated$OG_unannotated_clusters <- mouse.unannotated$seurat_clusters
myClusters <- c(0, 2, 3, 4, 8, 9, 11, 13, 16, 19, 21, 25)
keepCells <- mouse.unannotated@meta.data[,"seurat_clusters"] %in% myClusters
keepCol <- c(4:14,23)
meta <- mouse.unannotated@meta.data[keepCells,keepCol]
counts <- LayerData(mouse.unannotated, assay = "RNA", layer = "counts")[,keepCells]
group1 <- CreateSeuratObject(counts,
project = "Recluster Group 1",
meta.data = meta)
remove(meta)
# filter lowly expressed genes
counts <- GetAssayData(object = group1, layer = "counts")
nonzero <- counts > 0 # produces logical
keep <- Matrix::rowSums(nonzero) >= 10 # sum the true/false
counts.filtered <- counts[keep,]
group1 <- CreateSeuratObject(counts.filtered, meta.data = group1@meta.data)
# overwrite group1
group1 <- CreateSeuratObject(counts.filtered,
meta.data = group1@meta.data)
remove(counts,nonzero,keep,counts.filtered)
# sctransform, pca, harmony
group1[["RNA"]] <- split(group1[["RNA"]], f = group1$sample)
group1 <- SCTransform(group1, verbose = FALSE)
group1 <- RunPCA(group1, assay = "SCT")
## Warning in PrepDR(object = object, features = features, verbose = verbose): The
## following 28 features requested have not been scaled (running reduction without
## them): Pax5, D430041D05Rik, Ibsp, Ighv1-55, H2-M9, Nxph2, Ighg1, Igkv15-103,
## Igkv6-15, Gm30613, Ighv1-82, Slc26a4, Atp6v1c2, Cyp2e1, Gm30275, Fut9,
## Ighv1-39, Igkv6-23, Kcnj3, Tnn, Igkv3-4, Adamdec1, Igkv4-55, Cd8b1, Itprid1,
## Ighv1-26, D630024D03Rik, Nxph1
## PC_ 1
## Positive: Gpc6, Slc4a10, Cdh11, Foxp2, Bnc2, Col12a1, Tenm3, Igfbp5, Cacna1c, Ptprd
## Mir100hg, Slc38a2, Epha3, Col1a2, Slit2, Ctnnd2, Ank2, Egfr, Runx2, Eya2
## Zfhx4, Slc47a1, Prrx1, Pdzrn3, Adam12, Lama2, Gm973, Pappa, Snhg11, Kctd8
## Negative: Flt1, Igfbp3, Plcb1, Ldb2, Ptprb, Cyyr1, Kdr, Ptprg, Emcn, Adgrl4
## Arl15, C1qa, Podxl, Lyz2, Plvap, Insr, Plpp1, Pecam1, Vwf, St6galnac3
## Stox2, C1qb, Fabp4, Pf4, Ly6c1, Dysf, Cd74, Ctla2a, Ablim1, Rapgef4
## PC_ 2
## Positive: C1qa, Lyz2, C1qb, Pf4, Mrc1, C1qc, Apoe, Ccl8, Cd74, Fcer1g
## Tyrobp, Ftl1, F13a1, Ctss, Csf1r, Trf, H2-Eb1, H2-Aa, Aif1, Ctsb
## Ifi27l2a, Fcrls, Selenop, H2-Ab1, Cst3, Wfdc17, Ms4a7, Fth1, Laptm5, Cd209f
## Negative: Plcb1, Flt1, Ldb2, Ptprg, Arl15, Igfbp3, Ptprb, Cyyr1, Ptprm, Emcn
## Plpp1, Mecom, Prkg1, Insr, Adgrl4, Stox2, Kdr, Vwf, St6galnac3, Hmcn1
## Podxl, Dysf, Tmtc2, Heg1, Prex2, Pcdh7, Etl4, Plvap, Pecam1, Samd12
## PC_ 3
## Positive: Slc4a10, Cdh11, Bnc2, Col12a1, Foxp2, Slc38a2, Slc47a1, Ctnnd2, Kctd8, Epha3
## Igfbp5, Slit2, Slc26a7, Pappa, H19, Gm973, Eya2, Tspan11, Snhg11, Adamtsl3
## Boc, Igf2, Adamts9, Slc23a2, Adamtsl1, Smoc1, Egfr, Runx2, Ecrg4, Col1a1
## Negative: Dmd, Ctnna3, Myh11, Acta2, Dgkb, Tagln, Cacnb2, Kcnab1, Notch3, Myl9
## Sox5, Pde3a, Tpm2, Nkain2, Sox6, Rgs6, Csmd1, Lmod1, Ank3, Kcna1
## Sntb1, Rcan2, Synpo2, Tpm1, Cdh19, Plp1, Carmn, Rgs5, Slc35f1, Slc38a11
## PC_ 4
## Positive: Myh11, Acta2, Dgkb, Notch3, Tagln, Kcnab1, Cacnb2, Myl9, Tpm2, Pde3a
## Lmod1, Rcan2, Tpm1, Rgs6, Rgs5, Carmn, Sox5, Mustn1, Slc38a11, Gucy1a1
## Myom1, Crip1, Ryr2, Pln, Aspn, Stac, Synpo2, Ebf2, Filip1l, Dmd
## Negative: Nkain2, Kcna1, Ank3, Cdh19, Plp1, Cadm2, Gpm6b, Csmd1, Mbp, Dlgap1
## Pcdh9, Zfp536, Adam23, Scn7a, Gm10863, Abca8a, Mpz, Lgi4, Slc35f1, Adgrl3
## Stard13, Kcna2, Sox10, Kcna6, Chl1, Il1rapl1, Gfra3, Sgcd, Lrrc4c, Fign
## PC_ 5
## Positive: Arhgap15, Dock2, Inpp4b, Cacna1e, Tmem163, Aff3, Ikzf1, Ptprc, Card11, Blnk
## Slc9a9, Rabgap1l, Bcl11a, Dock10, Zdhhc14, Fyb, Gpc6, Cmah, Mctp2, Irf8
## Runx2, Ripor2, Igkc, Siglech, Kcnq5, Gm30211, Pik3ap1, Ighm, Fhit, Kynu
## Negative: Mgp, Igfbp5, Ecrg4, Igf2, Ogn, Rps21, H19, Col1a2, Dcn, Rps29
## Gpx3, Rps8, Rpl30, Mfap4, Rpl37a, Fmod, Rps12, Rps20, Rpl23, Serpinf1
## Bgn, Tpt1, Rpl37, Col1a1, Cpxm1, Rps27a, Rpl39, Rpl27a, Rpl35a, Rps27
group1 <- IntegrateLayers(object = group1,
method = HarmonyIntegration,
orig.reduction = "pca",
assay = "SCT")
## Warning: HarmonyMatrix is deprecated and will be removed in the future from the
## API in the future
## Warning: Warning: The parameters do_pca and npcs are deprecated. They will be ignored for this function call and please remove parameters do_pca and npcs and pass to harmony cell_embeddings directly.
## This warning is displayed once per session.
## Warning: Warning: The parameter tau is deprecated. It will be ignored for this function call and please remove parameter tau in future function calls. Advanced users can set value of parameter tau by using parameter .options and function harmony_options().
## This warning is displayed once per session.
## Warning: Warning: The parameter block.size is deprecated. It will be ignored for this function call and please remove parameter block.size in future function calls. Advanced users can set value of parameter block.size by using parameter .options and function harmony_options().
## This warning is displayed once per session.
## Warning: Warning: The parameter max.iter.harmony is replaced with parameter max_iter. It will be ignored for this function call and please use parameter max_iter in future function calls.
## This warning is displayed once per session.
## Warning: Warning: The parameter max.iter.cluster is deprecated. It will be ignored for this function call and please remove parameter max.iter.cluster in future function calls. Advanced users can set value of parameter max.iter.cluster by using parameter .options and function harmony_options().
## This warning is displayed once per session.
## Warning: Warning: The parameter epsilon.cluster is deprecated. It will be ignored for this function call and please remove parameter epsilon.cluster in future function calls. Advanced users can set value of parameter epsilon.cluster by using parameter .options and function harmony_options().
## This warning is displayed once per session.
## Warning: Warning: The parameter epsilon.harmony is deprecated. It will be ignored for this function call and please remove parameter epsilon.harmony in future function calls. If users want to control if harmony would stop early or not, use parameter early_stop. Advanced users can set value of parameter epsilon.harmony by using parameter .options and function harmony_options().
## This warning is displayed once per session.
## Transposing data matrix
## Using automatic lambda estimation
## Initializing state using k-means centroids initialization
## Harmony 1/10
## Harmony 2/10
## Harmony 3/10
## Harmony 4/10
## Harmony converged after 4 iterations
# plot harmony
DimPlot(group1,
group.by = "sample",
reduction = "harmony",
cols = sample_colors,
shuffle = TRUE)
# find number of dims to include
group1@reductions$harmony@stdev <-
apply(group1@reductions$harmony@cell.embeddings, 2, sd)
ElbowPlot(group1,
reduction = "harmony") +
geom_vline(xintercept = 20, linetype = "dashed", color = "red")
# umap
group1 <- RunUMAP(group1,
dims = 1:20,
reduction = "harmony",
n.components = 3)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 15:55:27 UMAP embedding parameters a = 0.9922 b = 1.112
## 15:55:27 Read 11597 rows and found 20 numeric columns
## 15:55:27 Using Annoy for neighbor search, n_neighbors = 30
## 15:55:27 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 15:55:28 Writing NN index file to temp file /tmp/Rtmp7QUlAC/file4ef376407d8b9
## 15:55:28 Searching Annoy index using 1 thread, search_k = 3000
## 15:55:32 Annoy recall = 100%
## 15:55:33 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 15:55:34 Initializing from normalized Laplacian + noise (using RSpectra)
## 15:55:34 Commencing optimization for 200 epochs, with 486710 positive edges
## 15:55:40 Optimization finished
# classification
group1 <- FindNeighbors(object = group1,
assay = "SCT",
reduction = "harmony",
dims = 1:20)
## Computing nearest neighbor graph
## Computing SNN
group1 <- FindClusters(object = group1,
algorithm = 1, # 1 = Louvain
resolution = seq(0.1, 0.5,by = 0.1))
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 11597
## Number of edges: 380215
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9708
## Number of communities: 13
## Elapsed time: 1 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 11597
## Number of edges: 380215
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9586
## Number of communities: 16
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 11597
## Number of edges: 380215
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9486
## Number of communities: 17
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 11597
## Number of edges: 380215
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9397
## Number of communities: 18
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 11597
## Number of edges: 380215
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9319
## Number of communities: 19
## Elapsed time: 1 seconds
group1$seurat_clusters <- group1$SCT_snn_res.0.1
# cleanup env
gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 8551528 456.8 16665972 890.1 NA 16665972 890.1
## Vcells 519096939 3960.4 958303054 7311.3 102400 958302172 7311.3
DefaultAssay(group1) <- "RNA"
group1 <- JoinLayers(group1)
group1 <- NormalizeData(group1)
## Normalizing layer: counts
Idents(group1) <- "seurat_clusters"
cluster_colors <- c("black","gray","chocolate4","red","gold","green","forestgreen",
"cyan","blue","steelblue","pink","purple","deeppink")
u1 <- DimPlot(group1,
group.by = "seurat_clusters",
label = TRUE,
cols = cluster_colors)
u1
df <- as.data.frame(table(group1$seurat_clusters))
colnames(df) <- c("cluster","cell_count")
write.table(df,
file = paste0(out, "clustering_QC/unannotated_cells_per_cluster.tsv"),
quote = FALSE,
row.names = FALSE,
sep = "\t")
remove(df)
# UMAP percent.mt
f1 <- FeaturePlot(group1,
reduction = "umap",
features = "percent.mt") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
f1
# UMAP percent.ribo
f2 <- FeaturePlot(group1,
reduction = "umap",
features = "percent.ribo.protein") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
f2
# UMAP percent.hb
f3 <- FeaturePlot(group1,
reduction = "umap",
features = "percent.hb") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
f3
# UMAP nCount
f4 <- FeaturePlot(group1,
reduction = "umap",
features = "nCount_RNA") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
f4
# UMAP nFeature
f5 <- FeaturePlot(group1,
reduction = "umap",
features = "nFeature_RNA") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
f5
# UMAP cell.complexity
f6 <- FeaturePlot(group1,
reduction = "umap",
features = "cell.complexity") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
f6
# UMAP percent.ribo
f7 <- FeaturePlot(group1,
reduction = "umap",
features = "percent.ribo.protein") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
f7
d1 <- DimPlot(group1,
group.by = "sample",
shuffle = TRUE,
raster = FALSE,
cols = sample_colors)
d1
d2 <- DimPlot(group1,
split.by = "sample",
group.by = "seurat_clusters",
ncol = 2,
cols = cluster_colors,
shuffle = TRUE,
raster = FALSE)
d2
d3 <- DimPlot(group1,
split.by = "sex",
group.by = "seurat_clusters",
ncol = 2,
cols = cluster_colors,
shuffle = TRUE,
raster = FALSE)
d3
d4 <- DimPlot(group1,
group.by = "sex",
cols = c("green","purple"),
shuffle = TRUE,
raster = FALSE)
d4
d5 <- DimPlot(group1,
split.by = "isoform",
group.by = "seurat_clusters",
ncol = 2,
cols = cluster_colors,
shuffle = TRUE,
raster = FALSE)
d5
d6 <- DimPlot(group1,
group.by = "isoform",
cols = c("blue","red"),
shuffle = TRUE,
raster = FALSE)
d6
d7 <- DimPlot(group1,
group.by = "OG_unannotated_clusters",
cols = c("black","gray","red","blue","yellow","yellow4","green",
"aquamarine","steelblue","purple","coral","chocolate4"),
shuffle = TRUE,
raster = FALSE)
d7
pdf(paste0(out, "clustering_QC/umap_colored_by_sample.pdf"), width = 8, height = 6)
d1
dev.off()
## png
## 2
pdf(paste0(out, "clustering_QC/umap_split_by_sample.pdf"), width = 8, height = 6)
d2
dev.off()
## png
## 2
pdf(paste0(out, "clustering_QC/umap_split_by_sex.pdf"), width = 8, height = 6)
d3
dev.off()
## png
## 2
pdf(paste0(out, "clustering_QC/umap_colored_by_sex.pdf"), width = 8, height = 6)
d4
dev.off()
## png
## 2
pdf(paste0(out, "clustering_QC/umap_split_by_isoform.pdf"), width = 8, height = 6)
d5
dev.off()
## png
## 2
pdf(paste0(out, "clustering_QC/umap_colored_by_isoform.pdf"), width = 8, height = 6)
d6
dev.off()
## png
## 2
pdf(paste0(out, "clustering_QC/umap_colored_by_OG_pass1_unannotated_clusters.pdf"), width = 8, height = 6)
d7
dev.off()
## png
## 2
remove(d1,d2,d3,d4,d5,d6,d7)
Idents(group1) <- "seurat_clusters"
markers <- SeuratWrappers::RunPrestoAll(
object = group1,
assay = "RNA",
slot = "counts",
only.pos = FALSE
)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8
## Calculating cluster 9
## Calculating cluster 10
## Calculating cluster 11
## Calculating cluster 12
markers <- markers[markers$p_val_adj < 0.01,]
top3 <- Reduce(rbind,
by(markers,
markers["cluster"],
head,
n = 3))
top10 <- Reduce(rbind,
by(markers,
markers["cluster"],
head,
n = 10))
Idents(group1) <- "seurat_clusters"
group1 <- NormalizeData(group1)
## Normalizing layer: counts
DefaultAssay(group1) <- "RNA"
v1 <- VlnPlot(group1,
group.by = "seurat_clusters",
split.by = "seurat_clusters",
stack = TRUE,
flip = TRUE,
cols = cluster_colors,
features = top3$gene)
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##
## This message will be shown once per session.
v1
v2 <- VlnPlot(group1,
group.by = "seurat_clusters",
split.by = "seurat_clusters",
stack = TRUE,
flip = TRUE,
cols = cluster_colors,
features = c("Flt1","Ptprb","C1qa","Mrc1","Col1a1","Col1a2","Notch3",
"Rgs5","Kit","Fcer1a","Cdh19","Kcna1","Prox1","Flt4"))
v2
# Rename all identities
Idents(group1) <- "seurat_clusters"
group1.annotated <- RenameIdents(object = group1,
"0" = "Ambient RNAs", # ?
"1" = "BECs", # Flt1,Ptprb
"2" = "Macrophages", # C1qa,Mrc1
"3" = "Fibroblasts", # Col1a1,Col1a2
"4" = "BECs", # Flt1,Ptprb
"5" = "BECs", # Flt1,Ptprb
"6" = "Pericytes & SMCs", # Notch3,Rgs5
"7" = "Mast cells?", # Kit?
"8" = "Doublets", # Mpz,Rgs5,C1qa,Col1a1
"9" = "Unknown 1", # ?
"10" = "Schwann cells", # Cdh19,Kcna1
"11" = "LECs", # Flt4,Prox1
"12" = "Unknown 2") # ?
group1.annotated$annotated_clusters <- Idents(group1)
Idents(group1.annotated) <- "annotated_clusters"
# set colors
cluster_colors <- c("black","gray","chocolate4","red","gold","green","forestgreen",
"cyan","blue","steelblue","pink","purple","deeppink")
# umap
umap <- DimPlot(object = group1.annotated,
reduction = "umap",
repel = TRUE,
group.by = "annotated_clusters",
cols = cluster_colors)
umap
# change out dir
out <- "../../results/pass_1/recluster_group_2/"
# subset to group 2
myClusters <- c(1,5,6,7,10,17,22,23,24)
keepCells <- mouse.unannotated@meta.data[,"seurat_clusters"] %in% myClusters
keepCol <- c(4:14,23)
meta <- mouse.unannotated@meta.data[keepCells,keepCol]
counts <- LayerData(mouse.unannotated, assay = "RNA", layer = "counts")[,keepCells]
group2 <- CreateSeuratObject(counts,
project = "Recluster Group 2",
meta.data = meta)
remove(mouse.unanotated,meta)
## Warning in remove(mouse.unanotated, meta): object 'mouse.unanotated' not found
# filter lowly expressed genes
counts <- GetAssayData(object = group2, layer = "counts")
nonzero <- counts > 0 # produces logical
keep <- Matrix::rowSums(nonzero) >= 10 # sum the true/false
counts.filtered <- counts[keep,]
group2 <- CreateSeuratObject(counts.filtered, meta.data = group2@meta.data)
# overwrite group2
group2 <- CreateSeuratObject(counts.filtered,
meta.data = group2@meta.data)
remove(counts,nonzero,keep,counts.filtered)
# sctransform, pca, harmony
group2[["RNA"]] <- split(group2[["RNA"]], f = group2$sample)
group2 <- SCTransform(group2, verbose = FALSE)
group2 <- RunPCA(group2, assay = "SCT")
## Warning in PrepDR(object = object, features = features, verbose = verbose): The
## following 40 features requested have not been scaled (running reduction without
## them): Gm32569, Rag1, Igll1, Hcrtr2, Vpreb1, Gm47782, 1810046K07Rik, Gm31392,
## Il12b, 8030451A03Rik, Fmn2, Adarb2, Oprd1, Trpc3, Bfsp2, 1810059H22Rik,
## 5830411N06Rik, 5830418P13Rik, Gm34095, Dntt, Arpp21, Gm30948, Gm20756,
## Tmem132e, Gm49439, Ighg1, Epb41l4b, A330093E20Rik, Gm39383, Dkk2, Igkv8-19,
## Vpreb2, Pgpep1l, D030045P18Rik, Gm595, Lgr5, Rag2, Gm20743, Hs3st5, Sh3rf2
## PC_ 1
## Positive: Ebf1, Aff3, Pax5, Cd79a, Bach2, Tmem163, Ikzf3, Ms4a1, Kcnq5, Arhgap24
## Ly6d, Cacna1e, Cd79b, Bcl11a, Chst3, Agbl1, Sox5, Ripor2, Myo1e, Skap1
## Vpreb3, Btla, Ablim1, Cecr2, Gm30211, Iglc2, St6galnac3, Klhl14, Ets1, Iglc3
## Negative: Mrc1, F13a1, Apoe, Dab2, Stab1, Mctp1, C1qa, Slc9a9, Mir99ahg, Pf4
## Pid1, Rbpj, C1qb, C1qc, Lyz2, Cd163, Itsn1, Rnf150, Tanc2, Selenop
## Slc8a1, Ophn1, Aoah, Maf, Cfh, Psd3, Wwp1, Ccl8, Csf1r, Ms4a7
## PC_ 2
## Positive: Ebf1, Pax5, Cd79a, Aff3, Ms4a1, Bach2, Ly6d, Tmem163, Arhgap24, Bank1
## Cd79b, Agbl1, Chst3, Vpreb3, Cacna1e, Sox5, Cecr2, Bcl11a, Ppm1e, Klhl14
## Ighm, Iglc2, Gm30211, Ralgps2, Myo1e, Iglc3, Dennd5b, Akap12, Pou2af1, Fcmr
## Negative: Skap1, Gm2682, Tox, Themis, Ms4a4b, Cd247, Bcl11b, Prkcq, Tnik, Itk
## Cd226, Ccl5, Sntb1, Camk4, Il18r1, Fyn, Stat4, St6galnac3, Ikzf2, Trbc2
## Cd3g, Il7r, Lck, Btbd11, Cd3e, Samd3, Slco3a1, Nkg7, Grap2, Clnk
## PC_ 3
## Positive: Skap1, Mrc1, Stab1, Bank1, Gm2682, Tox, Ebf1, Ikzf3, Themis, Cd163
## Dab2, Slc9a9, Ccl8, Aff3, Tmem163, Pf4, Pax5, Kcnq5, Prkcq, Apoe
## Cd209f, Mir99ahg, Itsn1, Ms4a4b, Bcl11b, Cd247, Igf1, Ripor2, St6galnac3, Atxn1
## Negative: Alcam, S100a4, Ccr2, Gpr141, Lgals3, S100a6, Fn1, Fgr, Crip1, Plbd1
## Plcb1, Plac8, Cd74, H2-Eb1, Gda, H2-Ab1, Vim, Lyz2, Ifitm3, Trerf1
## Lsp1, H2-Aa, Klra2, Ly6c2, Cers6, Arhgef37, Gpx1, Mki67, Rap1gap2, Stxbp6
## PC_ 4
## Positive: Mki67, Top2a, Knl1, Diaph3, Kif11, Neil3, Cenpf, Cenpp, Cenpe, Kif15
## Prc1, Kif4, Nusap1, Lockd, Hmmr, Aspm, Ezh2, Tpx2, Cit, Tfrc
## Mis18bp1, Hist1h1b, Kif23, Anln, Kif14, Ank1, Sox6, Smc2, Slc25a21, Sptb
## Negative: Cd74, H2-Eb1, H2-Ab1, H2-Aa, S100a4, Lyz2, Ms4a1, Cd52, S100a6, Tmsb4x
## Gm15987, Fau, Ifitm3, Bank1, Rps24, Tmem163, Ccr2, Ly6d, Plac8, Lgals3
## Crip1, Actb, Rps9, Rpl13, Lsp1, Myo1e, Cd79a, Rps11, Rps27, Arhgap24
## PC_ 5
## Positive: Lyz2, S100a6, Tmsb10, Ifitm3, S100a4, Plac8, Ly6c2, Fn1, Ifi27l2a, Pf4
## Tmsb4x, Ccl8, Lgals3, F13a1, Plcb1, Hmgb2, Ftl1, Crip1, Hp, Tyrobp
## Tpt1, Fau, Rps24, Rps29, Gpx1, Rps16, Ptma, Rps9, Arhgef37, Rpl32
## Negative: Alcam, Fmnl2, Cadm1, Trerf1, Wdfy4, Srgap3, Flt3, Ccser1, Cdk14, Ciita
## Zfp366, Zbtb46, Gm36723, Itpr1, Clec9a, Dpp4, Tbc1d9, Chn2, Rtn1, Pid1
## Tbc1d8, Tgfbr1, Arsb, Phactr2, Plbd1, 1700025G04Rik, Dapk1, Kynu, Ctnnd2, Lrrk2
group2 <- IntegrateLayers(object = group2,
method = HarmonyIntegration,
orig.reduction = "pca",
assay = "SCT")
## Warning: HarmonyMatrix is deprecated and will be removed in the future from the
## API in the future
## Transposing data matrix
## Using automatic lambda estimation
## Initializing state using k-means centroids initialization
## Harmony 1/10
## Harmony 2/10
## Harmony 3/10
## Harmony converged after 3 iterations
# plot harmony
DimPlot(group2,
group.by = "sample",
reduction = "harmony",
cols = sample_colors,
shuffle = TRUE)
# find number of dims to include
group2@reductions$harmony@stdev <-
apply(group2@reductions$harmony@cell.embeddings, 2, sd)
ElbowPlot(group2,
reduction = "harmony") +
geom_vline(xintercept = 20, linetype = "dashed", color = "red")
# umap
group2 <- RunUMAP(group2,
dims = 1:20,
reduction = "harmony",
assay = "SCT",
n.components = 3)
## 15:57:53 UMAP embedding parameters a = 0.9922 b = 1.112
## 15:57:53 Read 7458 rows and found 20 numeric columns
## 15:57:53 Using Annoy for neighbor search, n_neighbors = 30
## 15:57:53 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 15:57:54 Writing NN index file to temp file /tmp/Rtmp7QUlAC/file4ef3725296b62
## 15:57:54 Searching Annoy index using 1 thread, search_k = 3000
## 15:57:56 Annoy recall = 100%
## 15:57:57 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 15:57:58 Initializing from normalized Laplacian + noise (using RSpectra)
## 15:57:58 Commencing optimization for 500 epochs, with 303046 positive edges
## 15:58:06 Optimization finished
# classification
group2 <- FindNeighbors(object = group2,
assay = "SCT",
reduction = "harmony",
dims = 1:20)
## Computing nearest neighbor graph
## Computing SNN
group2 <- FindClusters(object = group2,
algorithm = 1, # 1 = Louvain
resolution = seq(0.1, 0.5,by = 0.1))
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 7458
## Number of edges: 244515
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9645
## Number of communities: 8
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 7458
## Number of edges: 244515
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9448
## Number of communities: 12
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 7458
## Number of edges: 244515
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9316
## Number of communities: 18
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 7458
## Number of edges: 244515
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9221
## Number of communities: 19
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 7458
## Number of edges: 244515
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9141
## Number of communities: 21
## Elapsed time: 0 seconds
group2$seurat_clusters <- group2$SCT_snn_res.0.1
# cleanup env
gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 8752010 467.5 16665972 890.1 NA 16665972 890.1
## Vcells 728889387 5561.0 1380132396 10529.6 102400 1149940781 8773.4
DefaultAssay(group2) <- "RNA"
group2 <- JoinLayers(group2)
group2 <- NormalizeData(group2)
## Normalizing layer: counts
Idents(group2) <- "seurat_clusters"
cluster_colors <- c("blue","red","green","gold","purple","cyan","gray","black")
u1 <- DimPlot(group2,
group.by = "seurat_clusters",
label = TRUE,
cols = cluster_colors)
u1
df <- as.data.frame(table(group2$seurat_clusters))
colnames(df) <- c("cluster","cell_count")
write.table(df,
file = paste0(out, "clustering_QC/unannotated_cells_per_cluster.tsv"),
quote = FALSE,
row.names = FALSE,
sep = "\t")
remove(df)
# UMAP percent.mt
f1 <- FeaturePlot(group2,
reduction = "umap",
features = "percent.mt") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
f1
# UMAP percent.ribo
f2 <- FeaturePlot(group2,
reduction = "umap",
features = "percent.ribo.protein") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
f2
# UMAP percent.hb
f3 <- FeaturePlot(group2,
reduction = "umap",
features = "percent.hb") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
f3
# UMAP nCount
f4 <- FeaturePlot(group2,
reduction = "umap",
features = "nCount_RNA") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
f4
# UMAP nFeature
f5 <- FeaturePlot(group2,
reduction = "umap",
features = "nFeature_RNA") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
f5
# UMAP cell.complexity
f6 <- FeaturePlot(group2,
reduction = "umap",
features = "cell.complexity") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
f6
# UMAP percent.ribo
f7 <- FeaturePlot(group2,
reduction = "umap",
features = "percent.ribo.protein") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
f7
d1 <- DimPlot(group2,
group.by = "sample",
shuffle = TRUE,
raster = FALSE,
cols = sample_colors)
d1
d2 <- DimPlot(group2,
split.by = "sample",
group.by = "seurat_clusters",
ncol = 2,
cols = cluster_colors,
shuffle = TRUE,
raster = FALSE)
d2
d3 <- DimPlot(group2,
split.by = "sex",
group.by = "seurat_clusters",
ncol = 2,
cols = cluster_colors,
shuffle = TRUE,
raster = FALSE)
d3
d4 <- DimPlot(group2,
group.by = "sex",
cols = c("green","purple"),
shuffle = TRUE,
raster = FALSE)
d4
d5 <- DimPlot(group2,
split.by = "isoform",
group.by = "seurat_clusters",
ncol = 2,
cols = cluster_colors,
shuffle = TRUE,
raster = FALSE)
d5
d6 <- DimPlot(group2,
group.by = "isoform",
cols = c("blue","red"),
shuffle = TRUE,
raster = FALSE)
d6
d7 <- DimPlot(group2,
group.by = "OG_unannotated_clusters",
cols = c("gray40","deeppink","orange2","chocolate4","yellow3",
"red4","pink","coral","burlywood3"),
shuffle = TRUE,
raster = FALSE)
d7
pdf(paste0(out, "clustering_QC/umap_colored_by_sample.pdf"), width = 8, height = 6)
d1
dev.off()
## png
## 2
pdf(paste0(out, "clustering_QC/umap_split_by_sample.pdf"), width = 8, height = 6)
d2
dev.off()
## png
## 2
pdf(paste0(out, "clustering_QC/umap_split_by_sex.pdf"), width = 8, height = 6)
d3
dev.off()
## png
## 2
pdf(paste0(out, "clustering_QC/umap_colored_by_sex.pdf"), width = 8, height = 6)
d4
dev.off()
## png
## 2
pdf(paste0(out, "clustering_QC/umap_split_by_isoform.pdf"), width = 8, height = 6)
d5
dev.off()
## png
## 2
pdf(paste0(out, "clustering_QC/umap_colored_by_isoform.pdf"), width = 8, height = 6)
d6
dev.off()
## png
## 2
pdf(paste0(out, "clustering_QC/umap_colored_by_OG_pass1_unannotated_clusters.pdf"), width = 8, height = 6)
d7
dev.off()
## png
## 2
remove(d1,d2,d3,d4,d5,d6,d7)
Idents(group2) <- "seurat_clusters"
markers <- SeuratWrappers::RunPrestoAll(
object = group2,
assay = "RNA",
slot = "counts",
only.pos = FALSE
)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
markers <- markers[markers$p_val_adj < 0.01,]
top3 <- Reduce(rbind,
by(markers,
markers["cluster"],
head,
n = 3))
top10 <- Reduce(rbind,
by(markers,
markers["cluster"],
head,
n = 10))
Idents(group2) <- "seurat_clusters"
group2 <- NormalizeData(group2)
## Normalizing layer: counts
DefaultAssay(group2) <- "RNA"
v1 <- VlnPlot(group2,
group.by = "seurat_clusters",
split.by = "seurat_clusters",
stack = TRUE,
flip = TRUE,
cols = cluster_colors,
features = top3$gene)
v1
v2 <- VlnPlot(group2,
group.by = "seurat_clusters",
split.by = "seurat_clusters",
stack = TRUE,
flip = TRUE,
cols = cluster_colors,
features = c("C1qa","Ms4a7","Nkg7","Cd3e","Trac","Il23r","Ms4a1",
"Fcrla","Fgr","Flt1","Flt3"))
v2
pdf(paste0(out, "markers/auto_top3_markers_violin_unannotated.pdf"), width = 8, height = 6)
v1
dev.off()
## png
## 2
pdf(paste0(out, "markers/cluster_markers_violin_unannotated.pdf"), width = 8, height = 6)
v2
dev.off()
## png
## 2
# Rename all identities
Idents(group2) <- "seurat_clusters"
group2.annotated <- RenameIdents(object = group2,
"0" = "Macrophages", # C1qa,Ms4a7
"1" = "T cells", # Cd3e,Trac
"2" = "B cells", # Ms4a1,Fcrla
"3" = "Monocytes?", # Fgr
"4" = "B cells?", # Fcrla
"5" = "T cells", # Cd3e
"6" = "Contaminating endothelial", # Flt1
"7" = "DCs") # Flt3
group2.annotated$annotated_clusters <- Idents(group2)
Idents(group2.annotated) <- "annotated_clusters"
# set colors
cluster_colors <- c("black","gray","chocolate4","red","gold","green","forestgreen",
"cyan","blue","steelblue","pink","purple","deeppink")
# umap
umap <- DimPlot(object = group2.annotated,
reduction = "umap",
repel = TRUE,
group.by = "annotated_clusters",
cols = cluster_colors)
umap